import re
import time
import requests
from tqdm import tqdm
import mysql.connector


def get_reply(url):
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    }
    # 发送请求
    request = requests.get(url=url, headers=headers)
    request.encoding = 'gbk'
    # 返回html源代码
    html = request.text
    return html


def save_mysql(author, book_tilte, chapter, text):
    cnx = mysql.connector.connect(user='root',
                                  password='123456',
                                  host='127.0.0.1',
                                  port=3306,
                                  database='novel')
    cursor = cnx.cursor()
    query = '''CREATE TABLE IF NOT EXISTS qzsd(
   id INT(11) AUTO_INCREMENT PRIMARY KEY,
   author VARCHAR(255) NOT NULL,
   title VARCHAR(255) NOT NULL,
   chapter VARCHAR(255) NOT NULL,
   text LONGTEXT)'''
    cursor.execute(query)
    sql = "INSERT INTO qzsd (author, title, chapter, text) VALUES (%s, %s,%s, %s)"
    val = (author, book_tilte, chapter, text)
    cursor.execute(sql, val)
    cnx.commit()
    cursor.close()
    cnx.close()


def parse_reply(html):
    content = re.compile(r'(.*?)<br />')
    contents = re.findall(content, str(html))
    for i in range(len(contents)):
        contents[i] = contents[i].strip()
    content = ''.join(contents)
    title = re.findall('<font color="#dc143c" size="4">(.*?)</font>', html)[0]
    return title, content


def get_book_list(url):
    html = get_reply(url)
    content = re.compile(r"<a href='(.*?)' target='_blank'>")
    urls = re.findall(content, str(html))
    return urls


def get_menu(url):
    html = str(get_reply(url))
    start = html.find(r'正文')
    end = html.find('</tbody>', start + 1)
    content = re.compile(r'<a href="(.*?)">')
    contents = re.findall(content, html[start:end])
    author = re.findall(r'作者：(.*?) ', html)[0]
    title = re.findall(r'<font color="#dc143c">(.*?)</font>', html)[0]
    return author, title, contents


def main():
    for book_url in tqdm(get_book_list('https://www.kanunu8.com/book')):
        book_url = 'https://www.kanunu8.com/' + book_url
        author, booktitle, menu_lists = get_menu(book_url)
        for menu_list_url in menu_lists:
            detail_url = book_url.replace('index.html', menu_list_url)
            html = get_reply(detail_url)
            chapter, txt = parse_reply(html)
            save_mysql(author, booktitle, chapter, txt)
    time.sleep(1)


if __name__ == '__main__':
    main()
